import os
import logging
import re
from datetime import datetime, timedelta

from requests_aws4auth import AWS4Auth

from elasticsearch import RequestsHttpConnection
from elasticsearch.exceptions import NotFoundError, AuthorizationException
from elasticsearch_dsl import Index, Document, Integer, Date, Text, Ip, Keyword
from elasticsearch_dsl.connections import connections


logger = logging.getLogger(__name__)

# Name of the connection used for Elasticearch's template API
ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS = "logentry_template"

# Prefix of autogenerated indices
INDEX_NAME_PREFIX = "logentry_"

# Time-based index date format
INDEX_DATE_FORMAT = "%Y-%m-%d"

# Timeout for default connection
ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT = 15

# Timeout for template api Connection
ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT = 60

# Force an index template update
ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE = os.environ.get("FORCE_INDEX_TEMPLATE_UPDATE", "")

# Valid index prefix pattern
VALID_INDEX_PATTERN = r"^((?!\.$|\.\.$|[-_+])([^A-Z:\/*?\"<>|,# ]){1,255})$"


class LogEntry(Document):
    # random_id is the tie-breaker for sorting in pagination.
    # random_id is also used for deduplication of records when using a "at-least-once" delivery stream.
    # Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-search-after.html
    #
    # We use don't use the _id of a document since a `doc_values` is not build for this field:
    # An on-disk data structure that stores the same data in a columnar format
    # for optimized sorting and aggregations.
    # Reference: https://github.com/elastic/elasticsearch/issues/35369
    random_id = Text(fields={"keyword": Keyword()})
    kind_id = Integer()
    account_id = Integer()
    performer_id = Integer()
    repository_id = Integer()
    ip = Ip()
    metadata_json = Text()
    datetime = Date()

    _initialized = False

    @classmethod
    def init(cls, index_prefix, index_settings=None, skip_template_init=False):
        """
        Create the index template, and populate LogEntry's mapping and index settings.
        """
        wildcard_index = Index(name=index_prefix + "*")
        wildcard_index.settings(**(index_settings or {}))
        wildcard_index.document(cls)
        cls._index = wildcard_index
        cls._index_prefix = index_prefix

        if not skip_template_init:
            cls.create_or_update_template()

        # Since the elasticsearch-dsl API requires the document's index being defined as an inner class at the class level,
        # this function needs to be called first before being able to call `save`.
        cls._initialized = True

    @classmethod
    def create_or_update_template(cls):
        assert cls._index and cls._index_prefix
        index_template = cls._index.as_template(cls._index_prefix)
        index_template.save(using=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)

    def save(self, **kwargs):
        # We group the logs based on year, month and day as different indexes, so that
        # dropping those indexes based on retention range is easy.
        #
        # NOTE: This is only used if logging directly to Elasticsearch
        #       When using Kinesis or Kafka, the consumer of these streams
        #       will be responsible for the management of the indices' lifecycle.
        assert LogEntry._initialized
        kwargs["index"] = self.datetime.strftime(self._index_prefix + INDEX_DATE_FORMAT)
        return super(LogEntry, self).save(**kwargs)


class ElasticsearchLogs(object):
    """
    Model for logs operations stored in an Elasticsearch cluster.
    """

    def __init__(
        self,
        host=None,
        port=None,
        access_key=None,
        secret_key=None,
        aws_region=None,
        index_settings=None,
        use_ssl=True,
        index_prefix=INDEX_NAME_PREFIX,
    ):
        # For options in index_settings, refer to:
        # https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
        # some index settings are set at index creation time, and therefore, you should NOT
        # change those settings once the index is set.
        self._host = host
        self._port = port
        self._access_key = access_key
        self._secret_key = secret_key
        self._aws_region = aws_region
        self._index_prefix = index_prefix
        self._index_settings = index_settings
        self._use_ssl = use_ssl

        self._client = None
        self._initialized = False

    def _initialize(self):
        """
        Initialize a connection to an ES cluster and creates an index template if it does not exist.
        """
        if not self._initialized:
            http_auth = None
            if self._access_key and self._secret_key and self._aws_region:
                http_auth = AWS4Auth(self._access_key, self._secret_key, self._aws_region, "es")
            elif self._access_key and self._secret_key:
                http_auth = (self._access_key, self._secret_key)
            else:
                logger.warn("Connecting to Elasticsearch without HTTP auth")

            self._client = connections.create_connection(
                hosts=[{"host": self._host, "port": self._port}],
                http_auth=http_auth,
                use_ssl=self._use_ssl,
                verify_certs=True,
                connection_class=RequestsHttpConnection,
                timeout=ELASTICSEARCH_DEFAULT_CONNECTION_TIMEOUT,
            )

            # Create a second connection with a timeout of 60s vs 10s.
            # For some reason the PUT template API can take anywhere between
            # 10s and 30s on the test cluster.
            # This only needs to be done once to initialize the index template
            connections.create_connection(
                alias=ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
                hosts=[{"host": self._host, "port": self._port}],
                http_auth=http_auth,
                use_ssl=self._use_ssl,
                verify_certs=True,
                connection_class=RequestsHttpConnection,
                timeout=ELASTICSEARCH_TEMPLATE_CONNECTION_TIMEOUT,
            )

            try:
                force_template_update = ELASTICSEARCH_FORCE_INDEX_TEMPLATE_UPDATE.lower() == "true"
                self._client.indices.get_template(self._index_prefix)
                LogEntry.init(
                    self._index_prefix,
                    self._index_settings,
                    skip_template_init=not force_template_update,
                )
            except NotFoundError:
                LogEntry.init(self._index_prefix, self._index_settings, skip_template_init=False)
            finally:
                try:
                    connections.remove_connection(ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS)
                except KeyError as ke:
                    logger.exception(
                        "Elasticsearch connection not found to remove %s: %s",
                        ELASTICSEARCH_TEMPLATE_CONNECTION_ALIAS,
                        ke,
                    )

            self._initialized = True

    def index_name(self, day):
        """
        Return an index name for the given day.
        """
        return self._index_prefix + day.strftime(INDEX_DATE_FORMAT)

    def index_exists(self, index):
        try:
            return index in self._client.indices.get(index)
        except NotFoundError:
            return False

    @staticmethod
    def _valid_index_prefix(prefix):
        """
        Check that the given index prefix is valid with the set of indices used by this class.
        """
        return re.match(VALID_INDEX_PATTERN, prefix) is not None

    def _valid_index_name(self, index):
        """
        Check that the given index name is valid and follows the format:

        <index_prefix>YYYY-MM-DD
        """
        if not ElasticsearchLogs._valid_index_prefix(index):
            return False

        if not index.startswith(self._index_prefix) or len(index) > 255:
            return False

        index_dt_str = index.split(self._index_prefix, 1)[-1]
        try:
            datetime.strptime(index_dt_str, INDEX_DATE_FORMAT)
            return True
        except ValueError:
            logger.exception("Invalid date format (YYYY-MM-DD) for index: %s", index)
            return False

    def can_delete_index(self, index, cutoff_date):
        """
        Check if the given index can be deleted based on the given index's date and cutoff date.
        """
        assert self._valid_index_name(index)
        index_dt = datetime.strptime(index[len(self._index_prefix) :], INDEX_DATE_FORMAT)
        return index_dt < cutoff_date and cutoff_date - index_dt >= timedelta(days=1)

    def list_indices(self):
        self._initialize()
        try:
            return self._client.indices.get(self._index_prefix + "*").keys()
        except NotFoundError as nfe:
            logger.exception("`%s` indices not found: %s", self._index_prefix, nfe.info)
            return []
        except AuthorizationException as ae:
            logger.exception("Unauthorized for indices `%s`: %s", self._index_prefix, ae.info)
            return None

    def delete_index(self, index):
        self._initialize()
        assert self._valid_index_name(index)

        try:
            self._client.indices.delete(index)
            return index
        except NotFoundError as nfe:
            logger.exception("`%s` indices not found: %s", index, nfe.info)
            return None
        except AuthorizationException as ae:
            logger.exception("Unauthorized to delete index `%s`: %s", index, ae.info)
            return None


def configure_es(
    host,
    port,
    access_key=None,
    secret_key=None,
    aws_region=None,
    index_prefix=None,
    use_ssl=True,
    index_settings=None,
):
    """
    For options in index_settings, refer to:

    https://www.elastic.co/guide/en/elasticsearch/guide/master/_index_settings.html
    some index settings are set at index creation time, and therefore, you should NOT
    change those settings once the index is set.
    """
    es_client = ElasticsearchLogs(
        host=host,
        port=port,
        access_key=access_key,
        secret_key=secret_key,
        aws_region=aws_region,
        index_prefix=index_prefix or INDEX_NAME_PREFIX,
        use_ssl=use_ssl,
        index_settings=index_settings,
    )
    es_client._initialize()
    return es_client
